Note this script doesn’t use clustered SEs and it uses the full dataframe including defendants who did not receive prison times. Its purpose was to identify relevant correlations between judge/defendant demographics and sentencing outcomes.
#load packages
require(ggplot2)
require(dplyr)
require(tidyr)
require(readxl)
require(openxlsx)
require(broom)
require(stringr)
#load short data frame: case x defendant level
df_short <- readxl::read_excel('../01_preprocessed_data/hidden/full_short.xlsx') %>%
mutate(punishment_prison_days_conditiional = as.numeric(punishment_prison_days_conditiional),
case_district_court = relevel(factor(case_district_court), "Oslo tingrett"))#reorder factor for regr. analysis
### FUNCTION: extract_vars ###
# @param cur_fp: filepath to spreadsheet containing variable list
# @param cur_sheet: sheet name containing variable list
#
# returns: vector of variables
extract_vars <- function(cur_fp, cur_sheet){
cur_df <- readxl::read_excel(cur_fp, sheet = cur_sheet)
return(cur_df$vars)
}
fp_desc <- '../02_config/config_descriptives.xlsx' #file path for descriptive variables
fp_regr <- '../02_config/config_regression.xlsx' #file path for regression variables
#load variables into vectors
defendant_chars_descriptive <- extract_vars(fp_desc, 'defendant_chars')
judge_chars_descriptive <- extract_vars(fp_desc,'judge_chars')
outcomes_descriptive <- extract_vars(fp_desc, 'outcomes')
defendant_chars_regression <- extract_vars(fp_regr, 'defendant_chars')
judge_chars_regression <- extract_vars(fp_regr,'judge_chars')
outcomes_regression <- extract_vars(fp_regr, 'outcomes')
circs_regression <- openxlsx::read.xlsx('../02_config/aggravating_mitigating_circs.xlsx')$col
sections_regression <- openxlsx::read.xlsx('../02_config/sections.xlsx')$col
sections_descriptive <- sections_regression
dir.create('../03_results/exploratory', showWarnings = F)
### FUNCTION: descriptive_analysis ###
# @param raw_df: df that contains all variables contained in other paramuments
# @param fp: filepath to save results spreadsheet (needs to end in '.csv')
# @param column: If raw_df was restricted in some way, this can be indicated here
# @param print: boolean to indicate if graphs should be generated
# @param variables_to_check: vector of (string) variables to pivot over
#
# @returns: spreadsheet of pivot results
#
# @process: pivot raw_df by each variable in variables_to_check, calculate means for each
# variable in outcomes_descriptive, and add variable counts.
# Display means broken down by outcomes and each variable in variables_to_check.
descriptive_analysis <- function(raw_df, fp, column = '', print, variables_to_check){
#set up master df
desc_master_df <- data.frame()
#loop over variables_to_check
for(variable in variables_to_check){
if(print) print(variable)
#pivot raw_df by variable and calculate means for each variable in outcomes_descriptive
cur_df <- raw_df %>%
group_by_at(variable) %>%
summarise_at(.vars = c(outcomes_descriptive, circs_regression), .funs = mean, na.rm = T)
#calculate counts for each breakdown
cur_df_count <- raw_df %>%
group_by_at(variable) %>%
count()
#join counts and rename variables
cur_df <- cur_df %>%
left_join(cur_df_count, by = variable) %>%
rename(var_value = variable) %>%
mutate(var_value = as.character(var_value), #var_value contains the variable values of 'variable'
grouping_var = variable, #variable we pivoted by
restriction = column) #restriction condition
#bind results to master df
desc_master_df <- desc_master_df %>%
bind_rows(cur_df)
}
#pivot to longer for graphical analysis, only keep outcome means
desc_master_df_short <- desc_master_df %>%
dplyr::select(outcomes_descriptive, var_value, grouping_var) %>%
pivot_longer(cols = all_of(outcomes_descriptive), names_to = 'outcome_char', values_to = 'outcome_value') %>%
filter(!is.na(grouping_var)) #get rid of NAs
#loop over variables_to_check to plot results separately for each var in variables_to_check
for(variable in variables_to_check){
#restrict master_df to 'variable'
cur_df <- desc_master_df_short %>%
dplyr::filter(grouping_var == variable)
#plot results; each plot is for a single variable in variables_to_check
#x-axis are var_values, y-axis are outcome means, facets are different outcomes
p <- ggplot(cur_df, aes(x = var_value, y = outcome_value))+
geom_bar(stat = 'identity', position = 'dodge', fill = 'blue')+
facet_wrap(.~outcome_char, scales = 'free_y')+
labs(title = variable, x = 'characteristic',
y = 'mean outcome value')+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 5))
#print if 'print' is set to true
if(print) print(p)
}
#save master df (in wide format) at fp
write.csv(desc_master_df, fp)
#return master df (in wide format)
return(desc_master_df)
}
### FUNCTION: regression_analysis ###
# @param raw_df: df that contains all variables contained in other paramuments
# @param fp: filepath to save results spreadsheet (needs to end in '.csv')
# @param column: If raw_df was restricted in some way, this can be indicated here
# @param print: boolean to indicate if graphs should be generated
# @param variables_to_check: vector of (string) variables; independent variables
# @param controls: vector of variables to control for in 'Pers.' and 'Conv.' regressions
# @separate_displays: vector of variables for which to create separate graphs
#
# @returns: spreadsheet of regression results
#
# @process: loop over each variable in variables_to_check and each outcome in outcomes_regressions.
# regress outcome on variable without controls (None), with demographic controls and controls in 'controls' ('Pers.'),
# and the above-mentioned controls plus dummy variables for each conviction section ('Conv.').
# Display results for each variable in variables_to_check by outcome and model type (i.e., the level of controls). Make separate graphs
# for factor variables with a lot of levels that are contained in separate_displays
regression_analysis <- function(raw_df, fp, column, print, variables_to_check, controls, seperate_displays = c(' ')){
#set up master df
reg_master_df <- data.frame()
#loop over variables_to_check
for(variable in variables_to_check){
if(print) print(variable)
#loop over outcomes_regression
for(outcome in outcomes_regression){
#No controls regression#
#set up formula
fr <- paste0(outcome, ' ~ ', variable)
#fit; catch errors
cur_fit <- try(lm(formula = fr, data = raw_df))
if(is(cur_fit, 'try-error')){
print(paste0('Failed: ', fr))
next
}
#convert fit to df and correctly set up variables
cur_fit_df <- cur_fit %>%
tidy(conf.int = T, conf.level = 0.95) %>%
as.data.frame() %>%
filter(grepl(variable, term)) %>%
mutate(outcome = outcome,
sig_level = case_when(p.value < 0.01 ~ '***',
p.value < 0.05 ~ '**',
p.value < 0.1 ~ '*',
.default = 'insig'),
controls = 'None',
restriction = column)
#merge results with master df
reg_master_df <- reg_master_df %>%
bind_rows(cur_fit_df)
}
}
#loop over outcomes_regression
for(outcome in outcomes_regression){
#Personal controls regression#
#set up formula
fr_pers <- paste0(outcome, ' ~ ', paste(c(variables_to_check, controls), collapse = ' + '))
#fit with demographic controls; catch errors
cur_fit_pers <- try(lm(formula = fr_pers, data = raw_df))
if(is(cur_fit_pers, 'try-error')){
print(paste0('Failed: ', fr_pers))
next
}
#convert model to df
cur_fit_pers_df <- cur_fit_pers %>%
tidy(conf.int = T, conf.level = 0.95) %>%
as.data.frame() %>%
filter(grepl(paste(variables_to_check, collapse = '|'), term)) %>%
mutate(outcome = outcome,
sig_level = case_when(p.value < 0.01 ~ '***',
p.value < 0.05 ~ '**',
p.value < 0.1 ~ '*',
.default = 'insig'),
controls = 'Pers.',
restriction = column)
#merge results with master df
reg_master_df <- reg_master_df %>%
bind_rows(cur_fit_pers_df)
#set up formula for model including conviction controls
fr_conv <- paste0(outcome, ' ~ .')
#keep only outcome variables and variables used as controls
raw_df_restricted <- raw_df %>%
dplyr::select(all_of(c(outcome, variables_to_check, controls, sections_regression, circs_regression)))
#fit with all controls; catch errors
cur_fit_conv <- lm(formula = fr_conv, data = raw_df_restricted)
if(is(cur_fit_conv, 'try-error')){
print(paste0('Failed: ', fr_conv))
next
}
#convert model to df
cur_fit_conv_df <- cur_fit_conv %>%
tidy(conf.int = T, conf.level = 0.95) %>%
as.data.frame() %>%
filter(grepl(paste(variables_to_check, collapse = '|'), term)) %>%
mutate(outcome = outcome,
sig_level = case_when(p.value < 0.01 ~ '***',
p.value < 0.05 ~ '**',
p.value < 0.1 ~ '*',
.default = 'insig'),
controls = 'Conv.',
restriction = column)
#bind to master df
reg_master_df <- reg_master_df %>%
bind_rows(cur_fit_conv_df)
}
#set up order of levels for controls variable
reg_master_df$controls <- factor(reg_master_df$controls, levels = c('None', 'Pers.', 'Conv.'))
#generate graphs if print is set to true
if(print){
#loop over outcomes_regression
for(cur_outcome in outcomes_regression){
#restrict current dataframe to the current outcome variable and remove independent variables
#contained in 'separate_displays' vector
cur_df <- reg_master_df %>%
filter(outcome == cur_outcome, !grepl(paste(seperate_displays, collapse = '|'), term))
#generate plot: x-axis is types of controls used, y-axis is effect size, color is significance level
#facet is the independent variable; allow y-axis scale to vary
p <- ggplot(cur_df, aes(x = controls, y = estimate, color = sig_level))+
geom_point()+
geom_errorbar(aes(ymin = conf.low, ymax = conf.high))+
facet_wrap(.~term, scales = 'free_y')+
labs(x = 'Controls',
y = 'Effect Size',
title = cur_outcome)
print(p)
#generate separate graphs for variables in 'seperate_displays'
for(sep_var in seperate_displays){
cur_df <- reg_master_df %>%
filter(outcome == cur_outcome, grepl(sep_var, term)) %>%
mutate(term_short = stringr::str_replace(term, sep_var, ''))
if(nrow(cur_df) == 0) next
p <- ggplot(cur_df, aes(x = controls, y = estimate, color = sig_level))+
geom_point()+
geom_errorbar(aes(ymin = conf.low, ymax = conf.high))+
facet_wrap(.~term_short)+
labs(x = 'Controls',
y = 'Effect Size',
title = cur_outcome)
print(p)
}
}
}
#save master df at fp
write.csv(reg_master_df, fp)
#return master df
return(reg_master_df)
}
#run descriptive analysis on defendant characteristics
df <- descriptive_analysis(df_short, '../03_results/exploratory/rq1_defendant_outcomes.csv', 'all', TRUE, defendant_chars_descriptive)
## [1] "defendant_age_groups"
## [1] "defendant_is_woman"
## [1] "defendant_some_foreign_background"
## [1] "defendant_is_married"
## [1] "defendant_net_income_quantile"
## [1] "case_is_not_oslo"
#run regression analysis on defendant characteristics
df <- regression_analysis(df_short, '../03_results/exploratory/rq1_regression_results.csv', 'all', TRUE, defendant_chars_regression, c())
## [1] "defendant_age"
## [1] "defendant_is_woman"
## [1] "defendant_some_foreign_background"
## [1] "defendant_log_net_income"
## [1] "case_is_not_oslo"
## [1] "defendant_is_married"
dir.create('../03_results/exploratory/rq2', showWarnings = F)
dir.create('../03_results/exploratory/rq2/defendant', showWarnings = F)
#set up master df
rq2_master_df_regr <- data.frame()
#loop over conviction variables
for (column in sections_regression) {
#restrict dataset to individuals who have been convicted of a certain crime
crime_df <- df_short %>% filter(.data[[column]] == 1)
# Check the number of rows in crime_df
if (nrow(crime_df) <= 100) {
next # Skip the current iteration if crime_df has 100 or more rows
}
if (column == "convicted_stay_of_execution_(conditional_imprisonment_)")
{
next
}
print(column)
#set up filenames to save results
filename_regression <- paste0('../03_results/exploratory/rq2/defendant/', column, '_regression.csv')
filename_descriptive <- paste0('../03_results/exploratory/rq2/defendant/', column, '_descriptive.csv')
#run regression analysis for dataset restricted by crime
cur_df <- regression_analysis(crime_df, filename_regression, column, FALSE, defendant_chars_regression, c())
#run descriptive analysis for dataset restricted by crime
df <- descriptive_analysis(crime_df, filename_descriptive, column, FALSE,defendant_chars_descriptive)
#bind regression results to master df
rq2_master_df_regr <- rq2_master_df_regr %>%
bind_rows(cur_df)
}
## [1] "convicted_stay_of_execution_.conditional_imprisonment_."
## [1] "convicted_bodily_injury"
## [1] "convicted_drug_offense"
## [1] "convicted_illegal_arming_in_a_public_place"
## [1] "convicted_participation"
## [1] "convicted_general_violation_of_traffic_act_1"
## [1] "convicted_drinking_within_six_hours_of_traffic_investigation"
## [1] "convicted_acquisition_possession_and_use_of_drugs"
## [1] "convicted_section_24_of_the_medicines_act_other"
## [1] "convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime."
## [1] "convicted_road_traffic_act_other_1"
## [1] "convicted_violating_traffic_signage_act"
## [1] "convicted_temporary_suspension_of_driving_lisence"
## [1] "convicted_assault_and_battery"
## [1] "convicted_reckless_behavior"
## [1] "convicted_violation_of_residence_and_contact_prohibition_or_decision_on_restraint"
## [1] "convicted_damage"
## [1] "convicted_threats"
## [1] "convicted_serious_drug_offense"
## [1] "convicted_theft"
## [1] "convicted_road_traffic_act_other_5"
## [1] "convicted_violating_basic_traffic_rules"
## [1] "convicted_loss_of_the_right_to_drive_a_motor_vehicle."
## [1] "convicted_obstruction_of_a_public_official"
## [1] "convicted_fraud"
## [1] "convicted_violence_threats_vandalism_or_other_unlawful_conduct_against_a_public_official"
## [1] "convicted_violation_of_police_act"
## [1] "convicted_disorderly_conduct"
## [1] "convicted_incorrect_explanation"
#restrict regression results to results that are significant at p < 0.05 and models with full controls
rq2_sig_fc <- rq2_master_df_regr %>%
filter(controls == 'Conv.', p.value < 0.05)
#save significant results
write.csv(rq2_sig_fc, '../03_results/exploratory/rq2_significant_results.csv')
#find crimes for which punishments are most likely to correlate with defendants' demographics
top_crimes <- rq2_sig_fc %>%
group_by(restriction) %>%
count() %>%
arrange(desc(n))
print(head(top_crimes, n = 10))
## # A tibble: 10 × 2
## # Groups: restriction [10]
## restriction n
## <chr> <int>
## 1 convicted_general_violation_of_traffic_act_1 5
## 2 convicted_drug_offense 4
## 3 convicted_reckless_behavior 4
## 4 convicted_violating_traffic_signage_act 4
## 5 convicted_acquisition_possession_and_use_of_drugs 3
## 6 convicted_damage 3
## 7 convicted_road_traffic_act_other_1 3
## 8 convicted_section_24_of_the_medicines_act_other 3
## 9 convicted_temporary_suspension_of_driving_lisence 3
## 10 convicted_assault_and_battery 2
#find defendant characteristics that affect the severity of judgements for the greatest number of crimes
#(counting each outcome separately)
top_char_all_outcomes <- rq2_sig_fc %>%
group_by(term) %>%
count() %>%
arrange(desc(n))
print(head(top_char_all_outcomes))
## # A tibble: 6 × 2
## # Groups: term [6]
## term n
## <chr> <int>
## 1 defendant_is_married 15
## 2 defendant_log_net_income 14
## 3 defendant_age 13
## 4 defendant_is_woman 8
## 5 case_is_not_oslo 5
## 6 defendant_some_foreign_background 3
#find defendant characteristics that affect the severity of judgements for the greatest number of crimes
#(not counting each outcome separately)
top_char_sections <- rq2_sig_fc %>%
group_by(term, restriction) %>%
count() %>%
group_by(term) %>%
count() %>%
arrange(desc(n))
print(head(top_char_sections, n = 10))
## # A tibble: 6 × 2
## # Groups: term [6]
## term n
## <chr> <int>
## 1 defendant_is_married 13
## 2 defendant_log_net_income 12
## 3 defendant_age 11
## 4 defendant_is_woman 8
## 5 case_is_not_oslo 4
## 6 defendant_some_foreign_background 3
#types of outcomes that are associated with defendants' demographics the most frequently
top_outcomes <- rq2_sig_fc %>%
group_by(outcome) %>%
count() %>%
arrange(desc(n))
print(top_outcomes)
## # A tibble: 3 × 2
## # Groups: outcome [3]
## outcome n
## <chr> <int>
## 1 punishment_fine 27
## 2 punishment_prison_days 17
## 3 judgement_convicted_binary 14
#find which defendant demographic is most frequently related to which outcome
defendant_outcome_freq <- as.data.frame(table(rq2_sig_fc$term, rq2_sig_fc$outcome)) %>%
arrange(desc(Freq))
print(defendant_outcome_freq)
## Var1 Var2 Freq
## 1 defendant_is_married punishment_fine 8
## 2 defendant_log_net_income punishment_fine 8
## 3 defendant_age punishment_prison_days 8
## 4 defendant_is_woman judgement_convicted_binary 5
## 5 defendant_age punishment_fine 5
## 6 defendant_is_married judgement_convicted_binary 4
## 7 defendant_log_net_income punishment_prison_days 4
## 8 case_is_not_oslo punishment_fine 3
## 9 defendant_is_married punishment_prison_days 3
## 10 defendant_log_net_income judgement_convicted_binary 2
## 11 defendant_some_foreign_background judgement_convicted_binary 2
## 12 defendant_is_woman punishment_fine 2
## 13 case_is_not_oslo judgement_convicted_binary 1
## 14 defendant_some_foreign_background punishment_fine 1
## 15 case_is_not_oslo punishment_prison_days 1
## 16 defendant_is_woman punishment_prison_days 1
## 17 defendant_age judgement_convicted_binary 0
## 18 defendant_some_foreign_background punishment_prison_days 0
#set up variable that is one if the effect is positive (characteristic leads to higher number in the outcome category)
rq2_sig_fc <- rq2_sig_fc %>%
dplyr::mutate(positive_sign = ifelse(estimate > 0, 1, 0))
#find which defendant characteristic leads to increases/decreases in the outcome category most frequently
defendant_outcome_freq_pos <- as.data.frame(table(rq2_sig_fc$term, rq2_sig_fc$outcome, rq2_sig_fc$positive_sign)) %>%
arrange(desc(Freq)) %>%
rename(defendant_char = Var1,
outcome = Var2,
positive_sign = Var3)
#find which defendant characteristic is associated with increases in the outcome category the greatest number of times
defendant_outcome_freq_pos %>%
filter(positive_sign == 1) %>%
head(n = 10) %>%
print()
## defendant_char outcome positive_sign
## 1 defendant_log_net_income punishment_fine 1
## 2 defendant_age punishment_prison_days 1
## 3 defendant_age punishment_fine 1
## 4 defendant_is_married punishment_fine 1
## 5 defendant_is_woman judgement_convicted_binary 1
## 6 defendant_is_married judgement_convicted_binary 1
## 7 defendant_some_foreign_background judgement_convicted_binary 1
## 8 defendant_is_married punishment_prison_days 1
## 9 case_is_not_oslo judgement_convicted_binary 1
## 10 defendant_some_foreign_background punishment_fine 1
## Freq
## 1 8
## 2 8
## 3 5
## 4 4
## 5 3
## 6 2
## 7 2
## 8 2
## 9 1
## 10 1
#find which defendant characteristic is associated with decreases in the outcome category the greatest number of times
defendant_outcome_freq_pos %>%
filter(positive_sign == 0) %>%
head(n = 10) %>%
print()
## defendant_char outcome positive_sign Freq
## 1 defendant_is_married punishment_fine 0 4
## 2 defendant_log_net_income punishment_prison_days 0 4
## 3 case_is_not_oslo punishment_fine 0 3
## 4 defendant_is_married judgement_convicted_binary 0 2
## 5 defendant_is_woman judgement_convicted_binary 0 2
## 6 defendant_log_net_income judgement_convicted_binary 0 2
## 7 defendant_is_woman punishment_fine 0 2
## 8 case_is_not_oslo punishment_prison_days 0 1
## 9 defendant_is_married punishment_prison_days 0 1
## 10 defendant_is_woman punishment_prison_days 0 1
#For each defendant characteristic and outcome category, find crimes for which there is a significant effect
#print the crimes where the effect is the biggest
for(cur_defendant in defendant_chars_regression){
for(cur_outcome in outcomes_regression){
cur_df <- rq2_sig_fc %>%
dplyr::select(term, outcome, restriction, estimate, p.value) %>%
dplyr::rename(characteristic = term,
crime = restriction) %>%
filter(characteristic == cur_defendant, outcome == cur_outcome) %>%
slice_max(abs(estimate), n = 10) %>%
print()
}
}
## [1] characteristic outcome crime estimate p.value
## <0 rows> (or 0-length row.names)
## characteristic outcome
## 1 defendant_age punishment_prison_days
## 2 defendant_age punishment_prison_days
## 3 defendant_age punishment_prison_days
## 4 defendant_age punishment_prison_days
## 5 defendant_age punishment_prison_days
## 6 defendant_age punishment_prison_days
## 7 defendant_age punishment_prison_days
## 8 defendant_age punishment_prison_days
## crime
## 1 convicted_drug_offense
## 2 convicted_road_traffic_act_other_5
## 3 convicted_illegal_arming_in_a_public_place
## 4 convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime.
## 5 convicted_temporary_suspension_of_driving_lisence
## 6 convicted_violating_traffic_signage_act
## 7 convicted_general_violation_of_traffic_act_1
## 8 convicted_drinking_within_six_hours_of_traffic_investigation
## estimate p.value
## 1 3.4297419 0.0182207268
## 2 3.3465849 0.0480281313
## 3 3.2203970 0.0005294762
## 4 1.3118823 0.0261630056
## 5 1.2006079 0.0002596586
## 6 0.7925754 0.0137070858
## 7 0.5082964 0.0006492344
## 8 0.4913532 0.0010669092
## characteristic outcome crime
## 1 defendant_age punishment_fine convicted_road_traffic_act_other_1
## 2 defendant_age punishment_fine convicted_violating_traffic_signage_act
## 3 defendant_age punishment_fine convicted_reckless_behavior
## 4 defendant_age punishment_fine convicted_general_violation_of_traffic_act_1
## 5 defendant_age punishment_fine convicted_assault_and_battery
## estimate p.value
## 1 213.72790 0.004532597
## 2 201.03767 0.007543465
## 3 137.82396 0.044210147
## 4 109.57488 0.011120759
## 5 56.19813 0.012575137
## characteristic outcome
## 1 defendant_is_woman judgement_convicted_binary
## 2 defendant_is_woman judgement_convicted_binary
## 3 defendant_is_woman judgement_convicted_binary
## 4 defendant_is_woman judgement_convicted_binary
## 5 defendant_is_woman judgement_convicted_binary
## crime
## 1 convicted_violation_of_police_act
## 2 convicted_assault_and_battery
## 3 convicted_violence_threats_vandalism_or_other_unlawful_conduct_against_a_public_official
## 4 convicted_reckless_behavior
## 5 convicted_drug_offense
## estimate p.value
## 1 -2.792629e-15 0.001155201
## 2 -2.538348e-15 0.002411851
## 3 1.066977e-15 0.016410674
## 4 2.965502e-16 0.048688345
## 5 8.357818e-17 0.011273566
## characteristic outcome crime
## 1 defendant_is_woman punishment_prison_days convicted_incorrect_explanation
## estimate p.value
## 1 -13.49623 0.04608169
## characteristic outcome
## 1 defendant_is_woman punishment_fine
## 2 defendant_is_woman punishment_fine
## crime estimate p.value
## 1 convicted_general_violation_of_traffic_act_1 -3379.833 0.03489335
## 2 convicted_damage -3224.115 0.02903713
## characteristic outcome
## 1 defendant_some_foreign_background judgement_convicted_binary
## 2 defendant_some_foreign_background judgement_convicted_binary
## crime estimate p.value
## 1 convicted_damage 1.436724e-15 0.0008344268
## 2 convicted_violating_basic_traffic_rules 2.424160e-16 0.0156587539
## [1] characteristic outcome crime estimate p.value
## <0 rows> (or 0-length row.names)
## characteristic outcome
## 1 defendant_some_foreign_background punishment_fine
## crime estimate p.value
## 1 convicted_temporary_suspension_of_driving_lisence 2941.334 0.04626381
## characteristic outcome
## 1 defendant_log_net_income judgement_convicted_binary
## 2 defendant_log_net_income judgement_convicted_binary
## crime
## 1 convicted_violence_threats_vandalism_or_other_unlawful_conduct_against_a_public_official
## 2 convicted_reckless_behavior
## estimate p.value
## 1 -7.957705e-17 0.03988181
## 2 -2.835157e-17 0.04331161
## characteristic outcome
## 1 defendant_log_net_income punishment_prison_days
## 2 defendant_log_net_income punishment_prison_days
## 3 defendant_log_net_income punishment_prison_days
## 4 defendant_log_net_income punishment_prison_days
## crime estimate p.value
## 1 convicted_participation -16.632751 5.577127e-03
## 2 convicted_section_24_of_the_medicines_act_other -5.236602 2.944234e-05
## 3 convicted_acquisition_possession_and_use_of_drugs -5.007349 1.540773e-04
## 4 convicted_threats -3.541713 6.663680e-03
## characteristic outcome
## 1 defendant_log_net_income punishment_fine
## 2 defendant_log_net_income punishment_fine
## 3 defendant_log_net_income punishment_fine
## 4 defendant_log_net_income punishment_fine
## 5 defendant_log_net_income punishment_fine
## 6 defendant_log_net_income punishment_fine
## 7 defendant_log_net_income punishment_fine
## 8 defendant_log_net_income punishment_fine
## crime
## 1 convicted_stay_of_execution_.conditional_imprisonment_.
## 2 convicted_drinking_within_six_hours_of_traffic_investigation
## 3 convicted_loss_of_the_right_to_drive_a_motor_vehicle.
## 4 convicted_general_violation_of_traffic_act_1
## 5 convicted_temporary_suspension_of_driving_lisence
## 6 convicted_section_24_of_the_medicines_act_other
## 7 convicted_acquisition_possession_and_use_of_drugs
## 8 convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime.
## estimate p.value
## 1 1086.1980 3.732625e-02
## 2 1065.0868 3.158167e-09
## 3 1024.1496 2.564026e-02
## 4 879.4619 1.877034e-10
## 5 641.7874 2.804050e-06
## 6 546.8547 4.380510e-03
## 7 546.6866 3.938085e-03
## 8 373.6703 5.221588e-05
## characteristic outcome crime estimate
## 1 case_is_not_oslo judgement_convicted_binary convicted_fraud 1.677324e-15
## p.value
## 1 0.03762771
## characteristic outcome crime estimate p.value
## 1 case_is_not_oslo punishment_prison_days convicted_fraud -34.51369 0.01005474
## characteristic outcome crime
## 1 case_is_not_oslo punishment_fine convicted_road_traffic_act_other_1
## 2 case_is_not_oslo punishment_fine convicted_violating_traffic_signage_act
## 3 case_is_not_oslo punishment_fine convicted_theft
## estimate p.value
## 1 -7476.818 0.01258817
## 2 -6274.398 0.03632426
## 3 -1941.121 0.02022391
## characteristic outcome
## 1 defendant_is_married judgement_convicted_binary
## 2 defendant_is_married judgement_convicted_binary
## 3 defendant_is_married judgement_convicted_binary
## 4 defendant_is_married judgement_convicted_binary
## crime estimate p.value
## 1 convicted_serious_drug_offense -4.575035e-15 0.02205752
## 2 convicted_road_traffic_act_other_5 -3.496628e-15 0.02876892
## 3 convicted_violating_traffic_signage_act 2.420500e-15 0.01881458
## 4 convicted_road_traffic_act_other_1 1.673703e-15 0.02175401
## characteristic outcome
## 1 defendant_is_married punishment_prison_days
## 2 defendant_is_married punishment_prison_days
## 3 defendant_is_married punishment_prison_days
## crime estimate p.value
## 1 convicted_serious_drug_offense 1222.37492 0.002099873
## 2 convicted_drug_offense 212.94071 0.005542127
## 3 convicted_general_violation_of_traffic_act_1 -17.81272 0.006536267
## characteristic outcome
## 1 defendant_is_married punishment_fine
## 2 defendant_is_married punishment_fine
## 3 defendant_is_married punishment_fine
## 4 defendant_is_married punishment_fine
## 5 defendant_is_married punishment_fine
## 6 defendant_is_married punishment_fine
## 7 defendant_is_married punishment_fine
## 8 defendant_is_married punishment_fine
## crime estimate p.value
## 1 convicted_section_24_of_the_medicines_act_other -10709.170 0.0064693484
## 2 convicted_acquisition_possession_and_use_of_drugs -9811.794 0.0108611354
## 3 convicted_reckless_behavior -9602.164 0.0007878867
## 4 convicted_violation_of_police_act 7806.936 0.0442773840
## 5 convicted_damage 5956.355 0.0179288895
## 6 convicted_drug_offense 4837.917 0.0263842150
## 7 convicted_theft 4790.092 0.0382015081
## 8 convicted_threats -2316.773 0.0206096997
# Icebox for now
#
# prefix <- "chapter_convicted"
#
# prefix_columns <- grep(paste0("^", prefix), names(df_short), value = TRUE)
#
# rq2_master_df_regr <- data.frame()
#
# for (column in prefix_columns) {
# crime_df <- df_short %>% filter(.data[[column]] == 1)
#
# # Check the number of rows in crime_df
# if (nrow(crime_df) <= 200) {
# next # Skip the current iteration if crime_df has 100 or more rows
# }
#
# print(column)
#
# filename_regression <- paste0('../03_results/exploratory/rq2/chapter/', column, '_regression.csv')
# filename_descriptive <- paste0('../03_results/exploratory/rq2/chapter/', column, '_descriptive.csv')
#
# cur_df <- regression_analysis(crime_df, filename_regression, column, FALSE,defendant_chars_regression, c())
# df <- descriptive_analysis(crime_df, filename_descriptive, column, FALSE,defendant_chars_descriptive)
# rq2_master_df_regr <- rq2_master_df_regr %>%
# bind_rows(cur_df)
# }
#run descriptive analysis for judge characteristics
df <- descriptive_analysis(df_short, '../03_results/exploratory/rq3_judge_outcomes.csv', 'all', TRUE,judge_chars_descriptive)
## [1] "judge_age_groups"
## [1] "judge_is_woman"
## [1] "judge_some_foreign_background"
## [1] "judge_is_married"
## [1] "judge_net_income_quantile"
## [1] "case_is_not_oslo"
#run regression analysis for defendant characteristics
df <- regression_analysis(df_short, '../03_results/exploratory/rq3_judge_regression_results.csv', 'all', TRUE,judge_chars_regression, defendant_chars_regression, c('judge_party', 'case_district_court'))
## [1] "judge_age"
## [1] "judge_is_woman"
## [1] "judge_some_foreign_background"
## [1] "judge_log_net_income"
## [1] "judge_is_married"
dir.create('../03_results/exploratory/rq2/', showWarnings = F)
dir.create('../03_results/exploratory/rq2/judge/', showWarnings = F)
#extract all dummy variables for individual crimes people were convicted of
prefix <- "convicted_"
prefix_columns <- grep(paste0("^", prefix), names(df_short), value = TRUE)
#set up master df
rq3a_master_df_regr <- data.frame()
#loop over crime variables
for (column in prefix_columns) {
#restrict data frame to people convucted of individual crimes
crime_df <- df_short %>% filter(.data[[column]] == 1)
# Check the number of rows in crime_df
if (nrow(crime_df) <= 100) {
next # Skip the current iteration if crime_df has 100 or fewer rows
}
print(column)
#set up filepaths to save results in
filename_regression <- paste0('../03_results/exploratory/rq2/judge/', column, '_regression.csv')
filename_descriptive <- paste0('../03_results/exploratory/rq2/judge/', column, '_descriptive.csv')
#run regression analysis for dfs restricted to people convicted of individual crimes
cur_df <- regression_analysis(crime_df, filename_regression, column, FALSE,judge_chars_regression, defendant_chars_regression)
#bind to master df
rq3a_master_df_regr <- rq3a_master_df_regr %>%
bind_rows(cur_df)
}
## [1] "convicted_stay_of_execution_.conditional_imprisonment_."
## [1] "convicted_bodily_injury"
## [1] "convicted_drug_offense"
## [1] "convicted_illegal_arming_in_a_public_place"
## [1] "convicted_participation"
## [1] "convicted_general_violation_of_traffic_act_1"
## [1] "convicted_drinking_within_six_hours_of_traffic_investigation"
## [1] "convicted_acquisition_possession_and_use_of_drugs"
## [1] "convicted_section_24_of_the_medicines_act_other"
## [1] "convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime."
## [1] "convicted_road_traffic_act_other_1"
## [1] "convicted_violating_traffic_signage_act"
## [1] "convicted_temporary_suspension_of_driving_lisence"
## [1] "convicted_assault_and_battery"
## [1] "convicted_reckless_behavior"
## [1] "convicted_violation_of_residence_and_contact_prohibition_or_decision_on_restraint"
## [1] "convicted_damage"
## [1] "convicted_threats"
## [1] "convicted_serious_drug_offense"
## [1] "convicted_theft"
## [1] "convicted_road_traffic_act_other_5"
## [1] "convicted_violating_basic_traffic_rules"
## [1] "convicted_loss_of_the_right_to_drive_a_motor_vehicle."
## [1] "convicted_obstruction_of_a_public_official"
## [1] "convicted_fraud"
## [1] "convicted_violence_threats_vandalism_or_other_unlawful_conduct_against_a_public_official"
## [1] "convicted_violation_of_police_act"
## [1] "convicted_disorderly_conduct"
## [1] "convicted_incorrect_explanation"
#restrict master df to models with full controls and results that were significant at the p < 0.05 level
rq3a_sig_fc <- rq3a_master_df_regr %>%
filter(controls == 'Conv.', p.value < 0.05)
#save df with significant results
write.csv(rq3a_sig_fc, '../03_results/exploratory/rq3a_significant_results.csv')
#find crimes where outcomes were most commonly associated with judges' demographics
top_crimes <- rq3a_sig_fc %>%
group_by(restriction) %>%
count() %>%
arrange(desc(n))
print(head(top_crimes, n = 10))
## # A tibble: 10 × 2
## # Groups: restriction [10]
## restriction n
## <chr> <int>
## 1 convicted_participation 4
## 2 convicted_incorrect_explanation 3
## 3 convicted_road_traffic_act_other_5 3
## 4 convicted_bodily_injury 2
## 5 convicted_disorderly_conduct 2
## 6 convicted_fraud 2
## 7 convicted_obstruction_of_a_public_official 2
## 8 convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_re… 2
## 9 convicted_damage 1
## 10 convicted_general_violation_of_traffic_act_1 1
#find judges' characteristics that are significantly associated with outcomes for the lparamest number of individual crimes
#multiple outcomes per crime are counted multiple times
top_char_all_outcomes <- rq3a_sig_fc %>%
group_by(term) %>%
count() %>%
arrange(desc(n))
print(head(top_char_all_outcomes))
## # A tibble: 5 × 2
## # Groups: term [5]
## term n
## <chr> <int>
## 1 judge_age 8
## 2 judge_some_foreign_background 6
## 3 judge_is_married 5
## 4 judge_log_net_income 5
## 5 judge_is_woman 3
#find judges' characteristics that are significantly associated with outcomes for the lparamest number of individual crimes
#multiple outcomes per crime are counted a single time
top_char_sections <- rq3a_sig_fc %>%
group_by(term, restriction) %>%
count() %>%
group_by(term) %>%
count() %>%
arrange(desc(n))
print(head(top_char_sections, n = 10))
## # A tibble: 5 × 2
## # Groups: term [5]
## term n
## <chr> <int>
## 1 judge_age 7
## 2 judge_is_married 5
## 3 judge_log_net_income 5
## 4 judge_some_foreign_background 5
## 5 judge_is_woman 3
#find outcomes that are associated with judges' characteristics for the greatest number of crimes
top_outcomes <- rq3a_sig_fc %>%
group_by(outcome) %>%
count() %>%
arrange(desc(n))
print(top_outcomes)
## # A tibble: 3 × 2
## # Groups: outcome [3]
## outcome n
## <chr> <int>
## 1 punishment_fine 13
## 2 judgement_convicted_binary 7
## 3 punishment_prison_days 7
#find which judge characteristic is associated with which outcome for the greatest number of crimes
judge_outcome_freq <- as.data.frame(table(rq3a_sig_fc$term, rq3a_sig_fc$outcome)) %>%
arrange(desc(Freq))
print(judge_outcome_freq)
## Var1 Var2 Freq
## 1 judge_age punishment_fine 5
## 2 judge_log_net_income punishment_fine 3
## 3 judge_some_foreign_background punishment_prison_days 3
## 4 judge_is_woman judgement_convicted_binary 2
## 5 judge_log_net_income judgement_convicted_binary 2
## 6 judge_is_married punishment_fine 2
## 7 judge_some_foreign_background punishment_fine 2
## 8 judge_age punishment_prison_days 2
## 9 judge_is_married punishment_prison_days 2
## 10 judge_age judgement_convicted_binary 1
## 11 judge_is_married judgement_convicted_binary 1
## 12 judge_some_foreign_background judgement_convicted_binary 1
## 13 judge_is_woman punishment_fine 1
## 14 judge_is_woman punishment_prison_days 0
## 15 judge_log_net_income punishment_prison_days 0
#set up variable for whether impact on outcome was positive or negative
rq3a_sig_fc <- rq3a_sig_fc %>%
dplyr::mutate(positive_sign = ifelse(estimate > 0, 1, 0))
#find which judge characteristic is associated with increases/decreases
#in outcome variables for the greatest number of crimes
judge_outcome_freq_pos <- as.data.frame(table(rq3a_sig_fc$term, rq3a_sig_fc$outcome, rq3a_sig_fc$positive_sign)) %>%
arrange(desc(Freq)) %>%
rename(defendant_char = Var1,
outcome = Var2,
positive_sign = Var3)
#find judge charactistics that lead to increases in an outcome variable for the greatest number of crimes
judge_outcome_freq_pos %>%
filter(positive_sign == 1) %>%
head(n = 10) %>%
print()
## defendant_char outcome positive_sign Freq
## 1 judge_age punishment_fine 1 4
## 2 judge_is_woman judgement_convicted_binary 1 2
## 3 judge_some_foreign_background punishment_fine 1 2
## 4 judge_age punishment_prison_days 1 2
## 5 judge_age judgement_convicted_binary 1 1
## 6 judge_is_married judgement_convicted_binary 1 1
## 7 judge_log_net_income judgement_convicted_binary 1 1
## 8 judge_some_foreign_background judgement_convicted_binary 1 1
## 9 judge_is_married punishment_fine 1 0
## 10 judge_is_woman punishment_fine 1 0
#find judge charactistics that lead to decreases in an outcome variable for the greatest number of crimes
judge_outcome_freq_pos %>%
filter(positive_sign == 0) %>%
head(n = 10) %>%
print()
## defendant_char outcome positive_sign Freq
## 1 judge_log_net_income punishment_fine 0 3
## 2 judge_some_foreign_background punishment_prison_days 0 3
## 3 judge_is_married punishment_fine 0 2
## 4 judge_is_married punishment_prison_days 0 2
## 5 judge_log_net_income judgement_convicted_binary 0 1
## 6 judge_age punishment_fine 0 1
## 7 judge_is_woman punishment_fine 0 1
## 8 judge_age judgement_convicted_binary 0 0
## 9 judge_is_married judgement_convicted_binary 0 0
## 10 judge_is_woman judgement_convicted_binary 0 0
#find crimes where judges' characteristics have the biggest impact on sentencing outcomes
for(cur_judge in judge_chars_regression){
for(cur_outcome in outcomes_regression){
cur_df <- rq3a_sig_fc %>%
dplyr::select(term, outcome, restriction, estimate, p.value) %>%
dplyr::rename(characteristic = term,
crime = restriction) %>%
filter(characteristic == cur_judge, outcome == cur_outcome) %>%
slice_max(abs(estimate), n = 10) %>%
print()
}
}
## characteristic outcome crime estimate
## 1 judge_age judgement_convicted_binary convicted_fraud 8.587035e-17
## p.value
## 1 0.001905894
## characteristic outcome
## 1 judge_age punishment_prison_days
## 2 judge_age punishment_prison_days
## crime
## 1 convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime.
## 2 convicted_temporary_suspension_of_driving_lisence
## estimate p.value
## 1 1.325579 0.03927478
## 2 0.679432 0.04823379
## characteristic outcome crime
## 1 judge_age punishment_fine convicted_road_traffic_act_other_5
## 2 judge_age punishment_fine convicted_threats
## 3 judge_age punishment_fine convicted_obstruction_of_a_public_official
## 4 judge_age punishment_fine convicted_incorrect_explanation
## 5 judge_age punishment_fine convicted_fraud
## estimate p.value
## 1 -867.03683 2.177773e-02
## 2 113.35012 2.022627e-04
## 3 110.19710 4.713502e-02
## 4 62.62385 5.651788e-08
## 5 48.50448 2.471014e-02
## characteristic outcome crime
## 1 judge_is_woman judgement_convicted_binary convicted_road_traffic_act_other_5
## 2 judge_is_woman judgement_convicted_binary convicted_participation
## estimate p.value
## 1 2.102379e-15 0.001281134
## 2 1.249239e-15 0.042536879
## [1] characteristic outcome crime estimate p.value
## <0 rows> (or 0-length row.names)
## characteristic outcome crime estimate p.value
## 1 judge_is_woman punishment_fine convicted_damage -2321.286 0.03283881
## characteristic outcome
## 1 judge_some_foreign_background judgement_convicted_binary
## crime estimate p.value
## 1 convicted_disorderly_conduct 5.951542e-15 0.0003184837
## characteristic outcome
## 1 judge_some_foreign_background punishment_prison_days
## 2 judge_some_foreign_background punishment_prison_days
## 3 judge_some_foreign_background punishment_prison_days
## crime estimate p.value
## 1 convicted_bodily_injury -132.02808 0.02439854
## 2 convicted_violating_basic_traffic_rules -62.03556 0.03464084
## 3 convicted_disorderly_conduct -47.00880 0.03199822
## characteristic outcome
## 1 judge_some_foreign_background punishment_fine
## 2 judge_some_foreign_background punishment_fine
## crime estimate p.value
## 1 convicted_participation 7293.264 0.01807152
## 2 convicted_general_violation_of_traffic_act_1 6843.366 0.02590505
## characteristic outcome crime
## 1 judge_log_net_income judgement_convicted_binary convicted_participation
## 2 judge_log_net_income judgement_convicted_binary convicted_bodily_injury
## estimate p.value
## 1 2.806408e-15 4.808496e-05
## 2 -1.748549e-15 2.631840e-04
## [1] characteristic outcome crime estimate p.value
## <0 rows> (or 0-length row.names)
## characteristic outcome
## 1 judge_log_net_income punishment_fine
## 2 judge_log_net_income punishment_fine
## 3 judge_log_net_income punishment_fine
## crime estimate p.value
## 1 convicted_obstruction_of_a_public_official -2658.4411 0.023575930
## 2 convicted_theft -1441.7071 0.038169309
## 3 convicted_incorrect_explanation -840.6299 0.001284872
## characteristic outcome
## 1 judge_is_married judgement_convicted_binary
## crime
## 1 convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime.
## estimate p.value
## 1 2.728566e-15 0.04525535
## characteristic outcome crime
## 1 judge_is_married punishment_prison_days convicted_participation
## 2 judge_is_married punishment_prison_days convicted_road_traffic_act_other_5
## estimate p.value
## 1 -199.83461 0.002401027
## 2 -71.04465 0.045099605
## characteristic outcome
## 1 judge_is_married punishment_fine
## 2 judge_is_married punishment_fine
## crime
## 1 convicted_violation_of_residence_and_contact_prohibition_or_decision_on_restraint
## 2 convicted_incorrect_explanation
## estimate p.value
## 1 -3077.3602 0.005058206
## 2 -557.7092 0.018938851
### FUNCTION: descriptive_analysis_2d ###
# @param raw_df: df that contains all variables contained in other paramuments
# @param fp: filepath to save results spreadsheet (needs to end in '.csv')
# @param column: If raw_df was restricted in some way, this can be indicated here
# @param print: boolean to indicate if graphs should be generated
# @param vars_d1: vector of (string) variables to pivot over
# @param vars_d2: vector of (string) variables to pivot over
#
# @returns: spreadsheet of pivot results
#
# @process: pivot raw_df by each combination of variables in in vars_d1 and vars_d2, calculate means for each
# variable in outcomes_descriptive, and add variable counts.
# Display means broken down by outcomes and each variable combination
descriptive_analysis_2d <- function(raw_df, fp, column, print, vars_d1, vars_d2){
desc_master_df <- data.frame()
#loop over variables in vars_d1
for(v1 in vars_d1){
#loop over variables in vars_d2
for(v2 in vars_d2){
if(v1 == v2) next
if(print) print(paste0(v1, ': ', v2))
#group raw_df by current variables v1 and v2 and calculate mean values for each outcome variable
cur_df <- raw_df %>%
group_by_at(vars(v1, v2)) %>%
summarise_at(.vars = outcomes_descriptive, .funs = mean, na.rm = T)
#calculate count (number of observations) for the variable combinations
cur_df_count <- raw_df %>%
group_by_at(vars(v1, v2)) %>%
count()
#join count and outcome mean table and rename variables
cur_df <- cur_df %>%
left_join(cur_df_count, by = c(v1, v2)) %>%
rename(v1_value = v1,
v2_value = v2) %>%
mutate(v1_value = as.character(v1_value),
v2_value = as.character(v2_value),
v1 = v1,
v2 = v2,
restriction = column)
#bind summary table to master df
desc_master_df <- desc_master_df %>%
bind_rows(cur_df)
}
}
#if print is set to true
if(print){
#pivot master df to longer for graphical display
desc_master_df_short <- desc_master_df %>%
pivot_longer(cols = all_of(outcomes_descriptive), names_to = 'outcome_char', values_to = 'outcome_value') %>%
filter(!is.na(v1_value), !is.na(v2_value))
#loop over variables in vars_d1
for(cur_v1 in vars_d1){
#loop over variables in vars_d2
for(cur_v2 in vars_d2){
#loop over outcome variables in outcomes_descriptive
for(outcome_var in outcomes_descriptive){
#restrict master df by current v1, v2, and outcome variable
cur_df <- desc_master_df_short %>%
dplyr::filter(v1 == cur_v1, v2 == cur_v2, outcome_char == outcome_var)
if(length(unique(cur_df$v2_value)) < 1) next
#plot descriptive results: x-axis are the values that can be taken on by v1, y-axis is the outcome mean
# facets are the values that v2 can take on
p <- ggplot(cur_df, aes(x = v1_value, y = outcome_value))+
geom_bar(stat = 'identity', position = 'dodge')+
facet_wrap(.~v2_value)+
labs(title = paste0(outcome_var, ': ', cur_v1, ' x ', cur_v2),
subtitle = paste0(outcome_var, ' by ', cur_v1, ' (x-axis) \nand ', cur_v2, ' (grid)'),
x = cur_v1,
y = outcome_var)+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0))
print(p)
}
}
}
}
#save master df
write.csv(desc_master_df, fp)
#return master df
return(desc_master_df)
}
### FUNCTION: regression_analysis_2d ###
# @param raw_df: df that contains all variables contained in other paramuments
# @param fp: filepath to save results spreadsheet (needs to end in '.csv')
# @param column: If raw_df was restricted in some way, this can be indicated here
# @param print: boolean to indicate if graphs should be generated
# @param vars_d1: vector of (string) variables
# @param vars_d2: vector of (string) variables
#
# @returns: spreadsheet of regression results
#
# @process: loop over each variable combination in vars_d1 and vars_d2 and each outcome in outcomes_regressions.
# Regress outcome on interaction of variable combination without controls (None), controlling for all variables in vars_d1 and vars_d2,
# and the above-mentioned controls plus dummy variables for each conviction section ('Conv.').
# Display results for each variable combination by outcome and model type (i.e., the level of controls).
regression_analysis_2d <- function(raw_df, fp, column, print, vars_d1, vars_d2){
#set up master df
reg_master_df <- data.frame()
#loop over variables in vars_d1
for(v1 in vars_d1){
#loop over variables in vars_d2
for(v2 in vars_d2){
if(v1 == v2) next
if(print) print(paste0(v1, ': ', v2))
#loop over variables in outcomes_regression
for(outcome in outcomes_regression){
#set up formula for interaction effect between v1 and v2 without any controls
fr <- paste0(outcome, ' ~ ', v1, '*', v2)
#fit; catch errors
cur_fit <- try(lm(formula = fr, data = raw_df))
if(is(cur_fit, 'try-error')){
print(paste0('Failed: ', fr))
next
}
#convert model to df and format variables
cur_fit_df <- cur_fit %>%
tidy(conf.int = T, conf.level = 0.95) %>%
as.data.frame() %>%
filter(grepl(v1, term), grepl(v2, term)) %>%
mutate(outcome = outcome,
sig_level = case_when(p.value < 0.01 ~ '***',
p.value < 0.05 ~ '**',
p.value < 0.1 ~ '*',
.default = 'insig'),
controls = 'None',
restriction = column) %>%
separate(col = term, into = c('judge_char', 'defendant_char'), sep = ':', remove = F)
#save regression results to master df
reg_master_df <- reg_master_df %>%
bind_rows(cur_fit_df)
#set up control vector by combining vars_d1 and vars_d2 and removing variables currently used for interaction effect
controls <- unique(c(vars_d1, vars_d2))
controls <- controls[!controls %in% c(v1, v2)]
#set up formula with controls
fr_pers <- paste(c(fr, controls), collapse = ' + ')
#run model; catch errors
cur_fit_pers <- try(lm(formula = fr_pers, data = raw_df))
if(is(cur_fit_pers, 'try-error')){
print(paste0('Failed: ', fr))
next
}
#convert to df and format variables
cur_fit_pers_df <- cur_fit_pers %>%
tidy(conf.int = T, conf.level = 0.95) %>%
as.data.frame() %>%
filter(grepl(v1, term), grepl(v2, term)) %>%
mutate(outcome = outcome,
sig_level = case_when(p.value < 0.01 ~ '***',
p.value < 0.05 ~ '**',
p.value < 0.1 ~ '*',
.default = 'insig'),
controls = 'Pers.',
restriction = column)%>%
separate(col = term, into = c('judge_char', 'defendant_char'), sep = ':', remove = F)
#save results in master df
reg_master_df <- reg_master_df %>%
bind_rows(cur_fit_pers_df)
#set up df and only keep outcome, control and conviction variables
df_restricted <- raw_df %>%
dplyr::select(all_of(c(outcome, v1, v2, controls, sections_regression, circs_regression)))
#set up formula for Conv. model
fr_conv <- paste0(fr, ' + .')
#run model; catch errors
cur_fit_conv <- try(lm(formula = fr_conv, data = df_restricted))
if(is(cur_fit_conv, 'try-error')){
print(paste0('Failed: ', fr))
next
}
#convert model to df and format variables
cur_fit_conv_df <- cur_fit_conv %>%
tidy(conf.int = T, conf.level = 0.95) %>%
as.data.frame() %>%
filter(grepl(v1, term), grepl(v2, term)) %>%
mutate(outcome = outcome,
sig_level = case_when(p.value < 0.01 ~ '***',
p.value < 0.05 ~ '**',
p.value < 0.1 ~ '*',
.default = 'insig'),
controls = 'Conv.',
restriction = column)%>%
separate(col = term, into = c('judge_char', 'defendant_char'), sep = ':', remove = F)
#save results to master df
reg_master_df <- reg_master_df %>%
bind_rows(cur_fit_conv_df)
}
}
}
#set up factors for model types
reg_master_df$controls <- factor(reg_master_df$controls, levels = c('None', 'Pers.', 'Conv.'))
#if print is set to true
if(print){
#loop over unique elements in 'judge_char' variable
for(v1 in unique(reg_master_df$judge_char)){
#loop over unique elements in 'defendant_char' variable
for(v2 in unique(reg_master_df$defendant_char)){
#restrict master df to results for the current judge_char and defendant_char
cur_df <- reg_master_df %>%
filter(grepl(v1, term), grepl(v2, term))
if(nrow(cur_df) == 0 | length(unique(cur_df$outcome)) < 1) next
#plot regression results; x-axis is the model type, y-axis is the effect size, color is the significance level,
#facet is the outcome type
p <- ggplot(cur_df, aes(x = controls, y = estimate, color = sig_level))+
geom_point()+
geom_errorbar(aes(ymin = conf.low, ymax = conf.high))+
facet_wrap(.~outcome, scales = 'free')+
labs(x = 'Controls',
y = 'Effect Size',
title = paste0(v1, ' x ', v2))
print(p)
}
}
}
#save master df at fp
write.csv(reg_master_df, fp)
#return master df
return(reg_master_df)
}
##RQ 3c: How do lay judge demographics interact with defendant demographics when it comes to equal treatment?
dir.create('../03_results/exploratory/rq3', showWarnings = F)
#run descriptive_analysis_2d for judge and defendant demographic characteristics
df <- descriptive_analysis_2d(df_short, '../03_results/exploratory/rq3/high_level_descriptives.csv', '', TRUE, judge_chars_descriptive, defendant_chars_descriptive)
## [1] "judge_age_groups: defendant_age_groups"
## [1] "judge_age_groups: defendant_is_woman"
## [1] "judge_age_groups: defendant_some_foreign_background"
## [1] "judge_age_groups: defendant_is_married"
## [1] "judge_age_groups: defendant_net_income_quantile"
## [1] "judge_age_groups: case_is_not_oslo"
## [1] "judge_is_woman: defendant_age_groups"
## [1] "judge_is_woman: defendant_is_woman"
## [1] "judge_is_woman: defendant_some_foreign_background"
## [1] "judge_is_woman: defendant_is_married"
## [1] "judge_is_woman: defendant_net_income_quantile"
## [1] "judge_is_woman: case_is_not_oslo"
## [1] "judge_some_foreign_background: defendant_age_groups"
## [1] "judge_some_foreign_background: defendant_is_woman"
## [1] "judge_some_foreign_background: defendant_some_foreign_background"
## [1] "judge_some_foreign_background: defendant_is_married"
## [1] "judge_some_foreign_background: defendant_net_income_quantile"
## [1] "judge_some_foreign_background: case_is_not_oslo"
## [1] "judge_is_married: defendant_age_groups"
## [1] "judge_is_married: defendant_is_woman"
## [1] "judge_is_married: defendant_some_foreign_background"
## [1] "judge_is_married: defendant_is_married"
## [1] "judge_is_married: defendant_net_income_quantile"
## [1] "judge_is_married: case_is_not_oslo"
## [1] "judge_net_income_quantile: defendant_age_groups"
## [1] "judge_net_income_quantile: defendant_is_woman"
## [1] "judge_net_income_quantile: defendant_some_foreign_background"
## [1] "judge_net_income_quantile: defendant_is_married"
## [1] "judge_net_income_quantile: defendant_net_income_quantile"
## [1] "judge_net_income_quantile: case_is_not_oslo"
## [1] "case_is_not_oslo: defendant_age_groups"
## [1] "case_is_not_oslo: defendant_is_woman"
## [1] "case_is_not_oslo: defendant_some_foreign_background"
## [1] "case_is_not_oslo: defendant_is_married"
## [1] "case_is_not_oslo: defendant_net_income_quantile"
#run regression_analysis_2d for judge and defendant demographic characteristics
reg_df <- regression_analysis_2d(df_short, '../03_results/exploratory/rq3/high_level_regression.csv', '', TRUE, judge_chars_regression, defendant_chars_regression)
## [1] "judge_age: defendant_age"
## [1] "judge_age: defendant_is_woman"
## [1] "judge_age: defendant_some_foreign_background"
## [1] "judge_age: defendant_log_net_income"
## [1] "judge_age: case_is_not_oslo"
## [1] "judge_age: defendant_is_married"
## [1] "judge_is_woman: defendant_age"
## [1] "judge_is_woman: defendant_is_woman"
## [1] "judge_is_woman: defendant_some_foreign_background"
## [1] "judge_is_woman: defendant_log_net_income"
## [1] "judge_is_woman: case_is_not_oslo"
## [1] "judge_is_woman: defendant_is_married"
## [1] "judge_some_foreign_background: defendant_age"
## [1] "judge_some_foreign_background: defendant_is_woman"
## [1] "judge_some_foreign_background: defendant_some_foreign_background"
## [1] "judge_some_foreign_background: defendant_log_net_income"
## [1] "judge_some_foreign_background: case_is_not_oslo"
## [1] "judge_some_foreign_background: defendant_is_married"
## [1] "judge_log_net_income: defendant_age"
## [1] "judge_log_net_income: defendant_is_woman"
## [1] "judge_log_net_income: defendant_some_foreign_background"
## [1] "judge_log_net_income: defendant_log_net_income"
## [1] "judge_log_net_income: case_is_not_oslo"
## [1] "judge_log_net_income: defendant_is_married"
## [1] "judge_is_married: defendant_age"
## [1] "judge_is_married: defendant_is_woman"
## [1] "judge_is_married: defendant_some_foreign_background"
## [1] "judge_is_married: defendant_log_net_income"
## [1] "judge_is_married: case_is_not_oslo"
## [1] "judge_is_married: defendant_is_married"